{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "multimodal_data_info_file_path ='multimodal_data_info.json'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_multimodal_data_information_json_file(json_file_path=\"multimodal_data_info.json\"):\n",
    "    \"\"\"\n",
    "    :param json_file_path:\n",
    "    :return: multimodal_data_information_list\n",
    "            [{'mp4_id': '97930081', 'mp4_download_url': ...'video_label': 'Military'},\n",
    "            {'mp4_id': '64413672', 'mp4_download_url': ... 'video_label': 'Military'}]\n",
    "    \"\"\"\n",
    "    def check_data(line_dict):\n",
    "        for item in ['mp4_id', 'video_label', 'mp4_time', 'mp4_download_url', 'mp4_background_image_url', 'mp4_txt_brief']:\n",
    "            if item not in line_dict:\n",
    "                return False\n",
    "        return True\n",
    "        \n",
    "    multimodal_data_information_list = list()\n",
    "    with open(json_file_path, 'r', encoding='utf-8') as f:\n",
    "        try:\n",
    "            while True:\n",
    "                line = f.readline()\n",
    "                if line:\n",
    "                    line_dict = json.loads(line)\n",
    "                    if check_data(line_dict):\n",
    "                        multimodal_data_information_list.append(line_dict)\n",
    "                    else:\n",
    "                        print(\"incomplete data:\")\n",
    "                        print(line_dict)\n",
    "                else:\n",
    "                    break\n",
    "        except:\n",
    "            f.close()\n",
    "    return multimodal_data_information_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "multimodal_data_information_list = read_multimodal_data_information_json_file(multimodal_data_info_file_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "562342"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(multimodal_data_information_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'mp4_id': '75265848',\n",
       "  'mp4_download_url': 'https://p5-v1.xpccdn.com/075265848_main_xl.mp4',\n",
       "  'mp4_time': '0:13',\n",
       "  'mp4_background_image_url': 'https://p5-i1.xpccdn.com/075265848_iconl.jpeg',\n",
       "  'mp4_txt_brief': ' Old antique German military rifle',\n",
       "  'video_label': 'Military'},\n",
       " {'mp4_id': '44566064',\n",
       "  'mp4_download_url': 'https://p5-v1.xpccdn.com/044566064_main_xl.mp4',\n",
       "  'mp4_time': '0:09',\n",
       "  'mp4_background_image_url': 'https://p5-i1.xpccdn.com/044566064_iconl.jpeg',\n",
       "  'mp4_txt_brief': ' quadcopter aerial drone',\n",
       "  'video_label': 'Military'},\n",
       " {'mp4_id': '62447549',\n",
       "  'mp4_download_url': 'https://p5-v1.xpccdn.com/062447549_main_xl.mp4',\n",
       "  'mp4_time': '0:06',\n",
       "  'mp4_background_image_url': 'https://p5-i1.xpccdn.com/062447549_iconl.jpeg',\n",
       "  'mp4_txt_brief': ' Firearm dis-assembly for cleaning and safety check of handheld gun',\n",
       "  'video_label': 'Military'}]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "multimodal_data_information_list[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def multimodal_data_json_file_to_datafram(json_file_path=\"multimodal_data_info.json\"):\n",
    "    \"\"\"\n",
    "    :param json_file_path: \n",
    "    :return: pandas.datafram\n",
    "    \"\"\"\n",
    "    multimodal_data_information_list = read_multimodal_data_information_json_file(json_file_path)\n",
    "    \n",
    "    multimodal_data_information_dict = {'mp4_id':[], 'video_label':[], 'mp4_time':[], \n",
    "                                        'mp4_download_url':[], 'mp4_background_image_url':[], 'mp4_txt_brief':[]}\n",
    "    \n",
    "    for data in multimodal_data_information_list:\n",
    "        multimodal_data_information_dict['mp4_id'].append(data['mp4_id'])\n",
    "        multimodal_data_information_dict['video_label'].append(data['video_label'])\n",
    "        multimodal_data_information_dict['mp4_time'].append(data['mp4_time'])\n",
    "        multimodal_data_information_dict['mp4_download_url'].append(data['mp4_download_url'])\n",
    "        multimodal_data_information_dict['mp4_background_image_url'].append(data['mp4_background_image_url'])\n",
    "        multimodal_data_information_dict['mp4_txt_brief'].append(data['mp4_txt_brief'])\n",
    "        \n",
    "    multimodal_data_information_datafram = pd.DataFrame(multimodal_data_information_dict)\n",
    "    \n",
    "    return multimodal_data_information_datafram"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "multimodal_data_information_datafram = multimodal_data_json_file_to_datafram(json_file_path=\"multimodal_data_info.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mp4_id</th>\n",
       "      <th>video_label</th>\n",
       "      <th>mp4_time</th>\n",
       "      <th>mp4_download_url</th>\n",
       "      <th>mp4_background_image_url</th>\n",
       "      <th>mp4_txt_brief</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>75265848</td>\n",
       "      <td>Military</td>\n",
       "      <td>0:13</td>\n",
       "      <td>https://p5-v1.xpccdn.com/075265848_main_xl.mp4</td>\n",
       "      <td>https://p5-i1.xpccdn.com/075265848_iconl.jpeg</td>\n",
       "      <td>Old antique German military rifle</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>44566064</td>\n",
       "      <td>Military</td>\n",
       "      <td>0:09</td>\n",
       "      <td>https://p5-v1.xpccdn.com/044566064_main_xl.mp4</td>\n",
       "      <td>https://p5-i1.xpccdn.com/044566064_iconl.jpeg</td>\n",
       "      <td>quadcopter aerial drone</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>62447549</td>\n",
       "      <td>Military</td>\n",
       "      <td>0:06</td>\n",
       "      <td>https://p5-v1.xpccdn.com/062447549_main_xl.mp4</td>\n",
       "      <td>https://p5-i1.xpccdn.com/062447549_iconl.jpeg</td>\n",
       "      <td>Firearm dis-assembly for cleaning and safety ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>42966432</td>\n",
       "      <td>Military</td>\n",
       "      <td>0:08</td>\n",
       "      <td>https://p5-v1.xpccdn.com/042966432_main_xl.mp4</td>\n",
       "      <td>https://p5-i1.xpccdn.com/042966432_iconl.jpeg</td>\n",
       "      <td>Kalashnikov deadly weapon</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>103424272</td>\n",
       "      <td>Military</td>\n",
       "      <td>0:13</td>\n",
       "      <td>https://p5-v1.xpccdn.com/103424272_main_xl.mp4</td>\n",
       "      <td>https://p5-i1.xpccdn.com/103424272_iconl.jpeg</td>\n",
       "      <td>Rows of ammunition in front of an animated Le...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      mp4_id video_label mp4_time  \\\n",
       "0   75265848    Military     0:13   \n",
       "1   44566064    Military     0:09   \n",
       "2   62447549    Military     0:06   \n",
       "3   42966432    Military     0:08   \n",
       "4  103424272    Military     0:13   \n",
       "\n",
       "                                 mp4_download_url  \\\n",
       "0  https://p5-v1.xpccdn.com/075265848_main_xl.mp4   \n",
       "1  https://p5-v1.xpccdn.com/044566064_main_xl.mp4   \n",
       "2  https://p5-v1.xpccdn.com/062447549_main_xl.mp4   \n",
       "3  https://p5-v1.xpccdn.com/042966432_main_xl.mp4   \n",
       "4  https://p5-v1.xpccdn.com/103424272_main_xl.mp4   \n",
       "\n",
       "                        mp4_background_image_url  \\\n",
       "0  https://p5-i1.xpccdn.com/075265848_iconl.jpeg   \n",
       "1  https://p5-i1.xpccdn.com/044566064_iconl.jpeg   \n",
       "2  https://p5-i1.xpccdn.com/062447549_iconl.jpeg   \n",
       "3  https://p5-i1.xpccdn.com/042966432_iconl.jpeg   \n",
       "4  https://p5-i1.xpccdn.com/103424272_iconl.jpeg   \n",
       "\n",
       "                                       mp4_txt_brief  \n",
       "0                  Old antique German military rifle  \n",
       "1                            quadcopter aerial drone  \n",
       "2   Firearm dis-assembly for cleaning and safety ...  \n",
       "3                          Kalashnikov deadly weapon  \n",
       "4   Rows of ammunition in front of an animated Le...  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "multimodal_data_information_datafram.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mp4_id</th>\n",
       "      <th>video_label</th>\n",
       "      <th>mp4_time</th>\n",
       "      <th>mp4_download_url</th>\n",
       "      <th>mp4_background_image_url</th>\n",
       "      <th>mp4_txt_brief</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>562342</td>\n",
       "      <td>562342</td>\n",
       "      <td>562342</td>\n",
       "      <td>562342</td>\n",
       "      <td>562342</td>\n",
       "      <td>562342</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>499607</td>\n",
       "      <td>31</td>\n",
       "      <td>184</td>\n",
       "      <td>499607</td>\n",
       "      <td>499607</td>\n",
       "      <td>343020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>88460884</td>\n",
       "      <td>Alpha Channel</td>\n",
       "      <td>0:10</td>\n",
       "      <td>https://p5-v1.xpccdn.com/023726153_main_xl.mp4</td>\n",
       "      <td>https://p5-i1.xpccdn.com/088460884_iconl.jpeg</td>\n",
       "      <td>Intro Background Texture Render Animation Col...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>9</td>\n",
       "      <td>19200</td>\n",
       "      <td>49660</td>\n",
       "      <td>9</td>\n",
       "      <td>9</td>\n",
       "      <td>10974</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          mp4_id    video_label mp4_time  \\\n",
       "count     562342         562342   562342   \n",
       "unique    499607             31      184   \n",
       "top     88460884  Alpha Channel     0:10   \n",
       "freq           9          19200    49660   \n",
       "\n",
       "                                      mp4_download_url  \\\n",
       "count                                           562342   \n",
       "unique                                          499607   \n",
       "top     https://p5-v1.xpccdn.com/023726153_main_xl.mp4   \n",
       "freq                                                 9   \n",
       "\n",
       "                             mp4_background_image_url  \\\n",
       "count                                          562342   \n",
       "unique                                         499607   \n",
       "top     https://p5-i1.xpccdn.com/088460884_iconl.jpeg   \n",
       "freq                                                9   \n",
       "\n",
       "                                            mp4_txt_brief  \n",
       "count                                              562342  \n",
       "unique                                             343020  \n",
       "top      Intro Background Texture Render Animation Col...  \n",
       "freq                                                10974  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "multimodal_data_information_datafram.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
